# Install and load required R packages
library("data.table")
library("dplyr")
library("knitr")
library("ggplot2")

Load model results

load(file = "rdata/mod.clinical-risk.rdata") # generated in clinical-risk.Rmd
load(file = "rdata/mod.demographic-risk.rdata") # generated in demographic-risk.Rmd
load(file = "rdata/mod.disease-course.rdata") # generated in disease-course.Rmd

del(17p)

fit = mod.risk$d17p
psl = rowSums(data.matrix(fit$data[,-1]) %*% diag(coef(fit)[-1])) + coef(fit)[1] # multiple each PC by the PC beta, sum across PCs, add the intercept beta

# check PSL calculation
unique(round(fit$fitted.values,12) == round(exp(psl)/(1+exp(psl)),12)) 
## [1] TRUE
#data.table(PSL=psl,fitted_values=fit$fitted.values,ePSL=exp(psl)/(1+exp(psl))) # view data table 

# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(category=fit$data$D_TRI_CF_ABNORMALITYPR11,psl=psl,decile=dcl)

# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/10,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="No") %>% group_by(decile) %>% count(category,name = "n_no")
tmp2 = dat %>% filter(category=="Yes") %>% group_by(decile) %>% count(category,name = "n_yes")
cnt = data.table(merge(tmp1,tmp2,by=c("decile"),all = T) %>% dplyr::select("decile","n_no","n_yes"))
kable(cnt,align = 'c',caption = "del(17p) PSL decile counts")
del(17p) PSL decile counts
decile n_no n_yes
1 43 1
2 42 2
3 42 1
4 44 NA
5 38 5
6 36 8
7 37 6
8 41 3
9 35 8
10 26 18
# calculate hazards ratios
a = cnt[decile%in%c(10),n_yes]
b = cnt[decile%in%c(10),n_no]
c = sum(cnt[decile%in%c(5,6),n_yes])
d = sum(cnt[decile%in%c(5,6),n_no])

hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)

paste0("Odds Ratio: del(17p) PSL in decile 10 compared with deciles 5 + 6 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "Odds Ratio: del(17p) PSL in decile 10 compared with deciles 5 + 6 = 3.94 (1.7-9.14)"

t(14;16)

fit = mod.risk$t1416
psl = rowSums(data.matrix(fit$data[,-1]) %*% diag(coef(fit)[-1])) + coef(fit)[1] # multiple each PC by the PC beta, sum across PCs, add the intercept beta

# check PSL calculation
unique(round(fit$fitted.values,12) == round(exp(psl)/(1+exp(psl)),12)) 

[1] TRUE

#data.table(PSL=psl,fitted_values=fit$fitted.values,ePSL=exp(psl)/(1+exp(psl))) # view data table 

# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(category=fit$data$D_TRI_CF_ABNORMALITYPR8,psl=psl,decile=dcl)

# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/5,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="No") %>% group_by(decile) %>% count(category,name = "n_no")
tmp2 = dat %>% filter(category=="Yes") %>% group_by(decile) %>% count(category,name = "n_yes")
cnt = data.table(merge(tmp1,tmp2,by=c("decile"),all = T) %>% dplyr::select("decile","n_no","n_yes"))
kable(cnt,align = 'c',caption = "t(14;16) PSL decile counts")
t(14;16) PSL decile counts
decile n_no n_yes
1 56 NA
2 55 NA
3 54 1
4 55 1
5 54 1
6 52 3
7 51 5
8 52 3
9 43 12
10 29 27
# calculate hazards ratios
a = cnt[decile%in%c(10),n_yes]
b = cnt[decile%in%c(10),n_no]
c = sum(cnt[decile%in%c(5,6),n_yes])
d = sum(cnt[decile%in%c(5,6),n_no])

hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)

paste0("Odds Ratio: t(14;16) PSL in decile 10 compared with deciles 5 + 6 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")

[1] “Odds Ratio: t(14;16) PSL in decile 10 compared with deciles 5 + 6 = 24.67 (7.99-76.19)”

amp(1q)

fit = mod.risk$a1q
psl = rowSums(data.matrix(fit$data[,-1]) %*% diag(coef(fit)[-1])) + coef(fit)[1] # multiple each PC by the PC beta, sum across PCs, add the intercept beta

# check PSL calculation
unique(round(fit$fitted.values,12) == round(exp(psl)/(1+exp(psl)),12)) 
## [1] TRUE
#data.table(PSL=psl,fitted_values=fit$fitted.values,ePSL=exp(psl)/(1+exp(psl))) # view data table 

# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(category=fit$data$D_TRI_CF_ABNORMALITYPR13,psl=psl,decile=dcl)

# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/5,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="No") %>% group_by(decile) %>% count(category,name = "n_no")
tmp2 = dat %>% filter(category=="Yes") %>% group_by(decile) %>% count(category,name = "n_yes")
cnt = data.table(merge(tmp1,tmp2,by=c("decile"),all = T) %>% dplyr::select("decile","n_no","n_yes"))
kable(cnt,align = 'c',caption = "amp(1q) PSL decile counts")
amp(1q) PSL decile counts
decile n_no n_yes
1 51 2
2 52 NA
3 47 6
4 39 13
5 47 6
6 33 19
7 30 22
8 12 41
9 5 47
10 7 46
# calculate hazards ratios
a = cnt[decile%in%c(10),n_yes]
b = cnt[decile%in%c(10),n_no]
c = sum(cnt[decile%in%c(5,6),n_yes])
d = sum(cnt[decile%in%c(5,6),n_no])

hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)

paste0("Odds Ratio: amp(1q) PSL in decile 10 compared with deciles 5 + 6 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "Odds Ratio: amp(1q) PSL in decile 10 compared with deciles 5 + 6 = 21.03 (8.44-52.41)"

t(4;14)

fit = mod.risk$t414
psl = rowSums(data.matrix(fit$data[,-1]) %*% diag(coef(fit)[-1])) + coef(fit)[1] # multiple each PC by the PC beta, sum across PCs, add the intercept beta

# check PSL calculation
unique(round(fit$fitted.values,12) == round(exp(psl)/(1+exp(psl)),12)) 
## [1] TRUE
#data.table(PSL=psl,fitted_values=fit$fitted.values,ePSL=exp(psl)/(1+exp(psl))) # view data table 

# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(category=fit$data$D_TRI_CF_ABNORMALITYPR3,psl=psl,decile=dcl)

# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/5,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="No") %>% group_by(decile) %>% count(category,name = "n_no")
tmp2 = dat %>% filter(category=="Yes") %>% group_by(decile) %>% count(category,name = "n_yes")
cnt = data.table(merge(tmp1,tmp2,by=c("decile"),all = T) %>% dplyr::select("decile","n_no","n_yes"))
kable(cnt,align = 'c',caption = "t(4;14) PSL decile counts")
t(4;14) PSL decile counts
decile n_no n_yes
1 59 1
2 57 2
3 55 4
4 56 4
5 55 4
6 54 5
7 57 3
8 52 7
9 29 30
10 12 48
# calculate hazards ratios
a = cnt[decile%in%c(10),n_yes]
b = cnt[decile%in%c(10),n_no]
c = sum(cnt[decile%in%c(5,6),n_yes])
d = sum(cnt[decile%in%c(5,6),n_no])

hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)

paste0("Odds Ratio: t(4;14) PSL in decile 10 compared with deciles 5 + 6 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "Odds Ratio: t(4;14) PSL in decile 10 compared with deciles 5 + 6 = 48.44 (19.14-122.61)"

t(11;14)

fit = mod.risk$t1114
psl = rowSums(data.matrix(fit$data[,-1]) %*% diag(coef(fit)[-1])) + coef(fit)[1] # multiple each PC by the PC beta, sum across PCs, add the intercept beta

# check PSL calculation
unique(round(fit$fitted.values,12) == round(exp(psl)/(1+exp(psl)),12)) 
## [1] TRUE
#data.table(PSL=psl,fitted_values=fit$fitted.values,ePSL=exp(psl)/(1+exp(psl))) # view data table 

# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(category=fit$data$D_TRI_CF_ABNORMALITYPR6,psl=psl,decile=dcl)

# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/5,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="No") %>% group_by(decile) %>% count(category,name = "n_no")
tmp2 = dat %>% filter(category=="Yes") %>% group_by(decile) %>% count(category,name = "n_yes")
cnt = data.table(merge(tmp1,tmp2,by=c("decile"),all = T) %>% dplyr::select("decile","n_no","n_yes"))
kable(cnt,align = 'c',caption = "t(11;14) PSL decile counts")
t(11;14) PSL decile counts
decile n_no n_yes
1 59 1
2 54 5
3 53 6
4 54 6
5 54 5
6 55 4
7 49 11
8 36 23
9 18 41
10 9 51
# calculate hazards ratios
a = cnt[decile%in%c(10),n_yes]
b = cnt[decile%in%c(10),n_no]
c = sum(cnt[decile%in%c(5,6),n_yes])
d = sum(cnt[decile%in%c(5,6),n_no])

hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)

paste0("Odds Ratio: t(11;14) PSL in decile 10 compared with deciles 5 + 6 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "Odds Ratio: t(11;14) PSL in decile 10 compared with deciles 5 + 6 = 68.63 (25.71-183.22)"

ISS

fit = mod.risk$iss
psl = rowSums(data.matrix(fit$model[,-1]) %*% diag(coef(fit))) # multiple each PC by the PC beta, sum across PCs

# check PSL calculation
unique(round(fit$lp,12) == round(psl,12)) 
## [1] TRUE
#data.table(psl,fit$lp)

# split into deciles
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(category=fit$model$ISS,psl=psl,decile=dcl)

# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/10,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category==1) %>% group_by(decile) %>% count(category,name = "n_1")
tmp2 = dat %>% filter(category==2) %>% group_by(decile) %>% count(category,name = "n_2")
tmp3 = dat %>% filter(category==3) %>% group_by(decile) %>% count(category,name = "n_3")
cnt = data.table(merge(merge(tmp1,tmp2,by=c("decile"),all = T),tmp3,by="decile",all=T))[,c("decile","n_1","n_2","n_3")]
kable(cnt,align = 'c',caption = "ISS PSL decile counts")
ISS PSL decile counts
decile n_1 n_2 n_3
1 52 17 6
2 36 33 6
3 37 26 11
4 30 29 16
5 34 27 13
6 26 30 19
7 17 32 25
8 8 39 28
9 14 21 39
10 10 15 50
# calculate hazards ratios
a = cnt[decile%in%c(10),n_3]
b = cnt[decile%in%c(10),n_1]
c = sum(cnt[decile%in%c(5,6),n_3])
d = sum(cnt[decile%in%c(5,6),n_1])

hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)

paste0("Odds Ratio: ISS PSL in decile 10 compared with deciles 5 + 6 for stage 1 v 3 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "Odds Ratio: ISS PSL in decile 10 compared with deciles 5 + 6 for stage 1 v 3 = 9.38 (4.2-20.93)"

Overall Survival (OS)

# multiply each PC by the PC beta, sum across PCs
psl = rowSums(data.matrix(os$data[,-c(1:2)]) %*% diag(coef(os$coxph))) 

# check PSL calculation
unique(round(os$coxph$linear.predictors,12) == round(psl,12)) 
## [1] TRUE
#data.table(psl,os$coxph$linear.predictors)

# split into deciles
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),
          include.lowest = T,labels = F)
dat = data.table(os$data[,c(1:2)],psl=psl,decile=dcl)

OS at 1 year

# is patient dead, alive, or last seen prior to cutoff?
dat$category = if_else(dat$ttcos<=365 & dat$censos==1,"dead",
                       if_else(dat$ttcos>365,"alive","censored")) 
dat
##      ttcos censos        psl decile category
##   1:  2300      0 -0.3011479      5    alive
##   2:  2263      0 -0.9262697      2    alive
##   3:   995      0 -1.8710216      1    alive
##   4:  2348      0 -1.6208278      1    alive
##   5:  1670      1  1.9406145     10    alive
##  ---                                        
## 763:   203      0 -0.1891478      5 censored
## 764:   163      0  1.3326660     10 censored
## 765:   198      0 -0.2040768      5 censored
## 766:   189      0 -0.6716711      3 censored
## 767:   170      0  0.7548512      9 censored
# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/10,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="dead") %>% group_by(decile) %>% count(category,name = "n_dead")
tmp2 = dat %>% filter(category=="alive") %>% group_by(decile) %>% count(category,name = "n_alive")
tmp3 = dat %>% filter(category=="censored") %>% group_by(decile) %>% count(category,name="n_censored")
cnt = data.table(merge(merge(tmp1,tmp2,by=c("decile"),all = T),tmp3,by="decile",all=T))[,c("decile","n_dead","n_alive","n_censored")]

# average days to death per decile - ignore censored values
avg_all = dat %>% filter() %>% group_by(decile) %>% summarise_at(vars(ttcos),mean) 
avg_c1 = dat %>% filter(censos==1) %>% group_by(decile) %>% summarise_at(vars(ttcos),mean)
avg_c0 = dat %>% filter(censos==0) %>% group_by(decile) %>% summarise_at(vars(ttcos),mean)

# number of events per decile
evts = dat %>% filter(censos==1) %>% group_by(decile) %>% count(censos,name="n_events")  
c2 = data.table(merge(merge(avg_all,avg_c0,by = "decile"),merge(avg_c1,evts,by="decile")))[,-"censos"]
colnames(c2) = c("decile","avg_days","avg_days_censored","avg_days_dead","events")

kable(merge(cnt,c2,by="decile"),align = 'c',
      caption = "Overall Surival PSL decile counts at 365 days")
Overall Surival PSL decile counts at 365 days
decile n_dead n_alive n_censored avg_days avg_days_censored avg_days_dead events
1 2 66 9 1123.8701 1188.0714 481.8571 7
2 2 64 11 1146.6234 1183.7500 612.0000 5
3 2 62 12 1059.1316 1101.6818 778.3000 10
4 3 61 13 1072.0130 1125.2388 715.4000 10
5 4 58 15 1024.0649 1089.6667 630.4545 11
6 3 61 12 915.7105 945.5714 771.0000 13
7 3 67 7 1042.2857 1125.1967 726.1875 16
8 7 60 9 987.4737 1085.6863 787.1200 25
9 10 58 9 843.5065 988.4444 639.6875 32
10 22 38 17 507.9610 551.6296 484.3800 50
# calculate hazards ratios
a = cnt[decile%in%c(10),n_dead]
b = cnt[decile%in%c(10),n_alive]
c = sum(cnt[decile%in%c(5,6),n_dead])
d = sum(cnt[decile%in%c(5,6),n_alive])

hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)

paste0("HR OS PSL decile 10 compared with deciles 5 + 6 at 365 days = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "HR OS PSL decile 10 compared with deciles 5 + 6 at 365 days = 9.84 (3.9-24.84)"

OS at 3 years

# is patient dead, alive, or last seen prior to cutoff
dat$category = if_else(dat$ttcos<=365*3 & dat$censos==1,"dead",
                       if_else(dat$ttcos>365*3,"alive","censored")) 
dat
##      ttcos censos        psl decile category
##   1:  2300      0 -0.3011479      5    alive
##   2:  2263      0 -0.9262697      2    alive
##   3:   995      0 -1.8710216      1 censored
##   4:  2348      0 -1.6208278      1    alive
##   5:  1670      1  1.9406145     10    alive
##  ---                                        
## 763:   203      0 -0.1891478      5 censored
## 764:   163      0  1.3326660     10 censored
## 765:   198      0 -0.2040768      5 censored
## 766:   189      0 -0.6716711      3 censored
## 767:   170      0  0.7548512      9 censored
# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/10,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="dead") %>% group_by(decile) %>% count(category,name = "n_dead")
tmp2 = dat %>% filter(category=="alive") %>% group_by(decile) %>% count(category,name = "n_alive")
tmp3 = dat %>% filter(category=="censored") %>% group_by(decile) %>% count(category,name="n_censored")
cnt = data.table(merge(merge(tmp1,tmp2,by=c("decile"),all = T),tmp3,by="decile",all=T))[,c("decile","n_dead","n_alive","n_censored")]

# average days to death per decile - ignore censored values
avg_all = dat %>% filter() %>% group_by(decile) %>% summarise_at(vars(ttcos),mean) 
avg_c1 = dat %>% filter(censos==1) %>% group_by(decile) %>% summarise_at(vars(ttcos),mean)
avg_c0 = dat %>% filter(censos==0) %>% group_by(decile) %>% summarise_at(vars(ttcos),mean)

# number of events per decile
evts = dat %>% filter(censos==1) %>% group_by(decile) %>% count(censos,name="n_events")  
c2 = data.table(merge(merge(avg_all,avg_c0,by = "decile"),merge(avg_c1,evts,by="decile")))[,-"censos"]
colnames(c2) = c("decile","avg_days","avg_days_censored","avg_days_dead","events")

kable(merge(cnt,c2,by="decile"),align = 'c',
      caption = "Overall Surival PSL decile counts at 3 years")
Overall Surival PSL decile counts at 3 years
decile n_dead n_alive n_censored avg_days avg_days_censored avg_days_dead events
1 7 43 27 1123.8701 1188.0714 481.8571 7
2 4 51 22 1146.6234 1183.7500 612.0000 5
3 7 43 26 1059.1316 1101.6818 778.3000 10
4 9 39 29 1072.0130 1125.2388 715.4000 10
5 9 38 30 1024.0649 1089.6667 630.4545 11
6 10 29 37 915.7105 945.5714 771.0000 13
7 12 38 27 1042.2857 1125.1967 726.1875 16
8 19 33 24 987.4737 1085.6863 787.1200 25
9 27 28 22 843.5065 988.4444 639.6875 32
10 45 10 22 507.9610 551.6296 484.3800 50
# calculate hazards ratios
a = cnt[decile%in%c(10),n_dead]
b = cnt[decile%in%c(10),n_alive]
c = sum(cnt[decile%in%c(5,6),n_dead])
d = sum(cnt[decile%in%c(5,6),n_alive])

hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)

paste0("HR OS PSL decile 10 compared with deciles 5 + 6 at ",365*3," days = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "HR OS PSL decile 10 compared with deciles 5 + 6 at 1095 days = 15.87 (6.76-37.27)"

Time to first line treatment failure (TTF)

tf_psl = rowSums(data.matrix(tf$data[,-c(1:2)]) %*% diag(coef(tf$coxph))) # multiple each PC by the PC beta, sum across PCs

# check PSL calculation
unique(round(tf$coxph$linear.predictors,12) == round(tf_psl,12)) 
## [1] TRUE
#data.table(tf1_psl,tf1$coxph$linear.predictors)

# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(tf$data[,c(1:2)],psl=psl,decile=dcl)

TTF at 1 year

dat$category = if_else(dat$ttctf1<=365 & dat$censtf1==1,"fail",if_else(dat$ttctf1>365,"okay","censored")) 
# describe if treatment has failed, not failed, or patient data censored
dat
##      ttctf1 censtf1        psl decile category
##   1:     84       1 -0.3011479      5     fail
##   2:    582       1 -0.9262697      2     okay
##   3:    995       0 -1.8710216      1     okay
##   4:    283       1 -1.6208278      1     fail
##   5:   1329       1  1.9406145     10     okay
##  ---                                          
## 763:     57       1 -0.1891478      5     fail
## 764:     58       1  1.3326660     10     fail
## 765:    198       0 -0.2040768      5 censored
## 766:    189       0 -0.6716711      3 censored
## 767:    170       0  0.7548512      9 censored
# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/10,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="fail") %>% group_by(decile) %>% count(category,name = "n_fail")
tmp2 = dat %>% filter(category=="okay") %>% group_by(decile) %>% count(category,name = "n_okay")
tmp3 = dat %>% filter(category=="censored") %>% group_by(decile) %>% count(category,name="n_censored")
cnt = data.table(merge(merge(tmp1,tmp2,by=c("decile"),all = T),tmp3,by="decile",all=T))[,c("decile","n_fail","n_okay","n_censored")]

# average days to death per decile - ignore censored values
avg_all = dat %>% filter() %>% group_by(decile) %>% summarise_at(vars(ttctf1),mean) 
avg_c1 = dat %>% filter(censtf1==1) %>% group_by(decile) %>% summarise_at(vars(ttctf1),mean)
avg_c0 = dat %>% filter(censtf1==0) %>% group_by(decile) %>% summarise_at(vars(ttctf1),mean)

# number of events per decile
evts = dat %>% filter(censtf1==1) %>% group_by(decile) %>% count(censtf1,name="n_events")  
c2 = data.table(merge(merge(avg_all,avg_c0,by = "decile"),merge(avg_c1,evts,by="decile")))[,-"censtf1"]
colnames(c2) = c("decile","avg_days","avg_days_censored","avg_days_fail","events")

kable(merge(cnt,c2,by="decile"),align = 'c',
      caption = "Time to first treatment fail PSL decile counts at 365 days") 
Time to first treatment fail PSL decile counts at 365 days
decile n_fail n_okay n_censored avg_days avg_days_censored avg_days_fail events
1 17 51 9 759.5455 979.2000 450.6562 32
2 10 57 10 859.4416 1044.6471 496.1538 26
3 14 49 13 763.8947 921.4222 535.2258 31
4 15 51 11 769.5195 947.0682 532.7879 33
5 18 45 14 659.3117 844.5714 437.0000 35
6 16 47 13 665.6316 758.8462 567.3784 37
7 19 50 8 718.9351 933.0000 520.9250 40
8 20 45 11 618.7500 786.5556 467.7250 40
9 27 38 12 484.2338 664.8889 386.6800 50
10 31 21 25 319.8442 307.3438 328.7333 45
# calculate hazards ratios
a = cnt[decile%in%c(10),n_fail]
b = cnt[decile%in%c(10),n_okay]
c = sum(cnt[decile%in%c(5,6),n_fail])
d = sum(cnt[decile%in%c(5,6),n_okay])

hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)

paste0("HR TTF PSL decile 10 compared with deciles 5 + 6 at 365 days = ",
       round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "HR TTF PSL decile 10 compared with deciles 5 + 6 at 365 days = 3.99 (2.02-7.88)"

TTF at 3 years

dat$category = if_else(dat$ttctf1<=365*3 & dat$censtf1==1,"fail",if_else(dat$ttctf1>365*3,"okay","censored")) 
# describe if treatment has failed, not failed, or patient data censored
dat
##      ttctf1 censtf1        psl decile category
##   1:     84       1 -0.3011479      5     fail
##   2:    582       1 -0.9262697      2     fail
##   3:    995       0 -1.8710216      1 censored
##   4:    283       1 -1.6208278      1     fail
##   5:   1329       1  1.9406145     10     okay
##  ---                                          
## 763:     57       1 -0.1891478      5     fail
## 764:     58       1  1.3326660     10     fail
## 765:    198       0 -0.2040768      5 censored
## 766:    189       0 -0.6716711      3 censored
## 767:    170       0  0.7548512      9 censored
# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/10,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="fail") %>% group_by(decile) %>% count(category,name = "n_fail")
tmp2 = dat %>% filter(category=="okay") %>% group_by(decile) %>% count(category,name = "n_okay")
tmp3 = dat %>% filter(category=="censored") %>% group_by(decile) %>% count(category,name="n_censored")
cnt = data.table(merge(merge(tmp1,tmp2,by=c("decile"),all = T),tmp3,by="decile",all=T))[,c("decile","n_fail","n_okay","n_censored")]

# average days to death per decile - ignore censored values
avg_all = dat %>% filter() %>% group_by(decile) %>% summarise_at(vars(ttctf1),mean) 
avg_c1 = dat %>% filter(censtf1==1) %>% group_by(decile) %>% summarise_at(vars(ttctf1),mean)
avg_c0 = dat %>% filter(censtf1==0) %>% group_by(decile) %>% summarise_at(vars(ttctf1),mean)

# number of events per decile
evts = dat %>% filter(censtf1==1) %>% group_by(decile) %>% count(censtf1,name="n_events")  
c2 = data.table(merge(merge(avg_all,avg_c0,by = "decile"),merge(avg_c1,evts,by="decile")))[,-"censtf1"]
colnames(c2) = c("decile","avg_days","avg_days_censored","avg_days_fail","events")

# remove censored before X days
kable(merge(cnt,c2,by="decile"),align = 'c',
      caption = "Time to first treatment fail PSL decile counts at 3 years") 
Time to first treatment fail PSL decile counts at 3 years
decile n_fail n_okay n_censored avg_days avg_days_censored avg_days_fail events
1 28 23 26 759.5455 979.2000 450.6562 32
2 26 31 20 859.4416 1044.6471 496.1538 26
3 29 25 22 763.8947 921.4222 535.2258 31
4 28 24 25 769.5195 947.0682 532.7879 33
5 33 19 25 659.3117 844.5714 437.0000 35
6 33 17 26 665.6316 758.8462 567.3784 37
7 36 17 24 718.9351 933.0000 520.9250 40
8 36 14 26 618.7500 786.5556 467.7250 40
9 49 8 20 484.2338 664.8889 386.6800 50
10 42 5 30 319.8442 307.3438 328.7333 45
# calculate hazards ratios
a = cnt[decile%in%c(10),n_fail]
b = cnt[decile%in%c(10),n_okay]
c = sum(cnt[decile%in%c(5,6),n_fail])
d = sum(cnt[decile%in%c(5,6),n_okay])

hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)

paste0("HR TTF PSL decile 10 compared with deciles 5 + 6 at ",365*3,
       " days = ",round(hr10,digits = 2)," (",
       round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "HR TTF PSL decile 10 compared with deciles 5 + 6 at 1095 days = 4.58 (1.66-12.61)"

Age

fit = lm.age
psl = rowSums(data.matrix(fit$model[,-1]) %*% diag(coef(fit)[-1])) + coef(fit)[1] # multiple each PC by the PC beta, sum across PCs, add the intercept beta

# check PSL calculation
unique(round(fit$fitted.values,10) == round(psl,10)) 
## [1] TRUE
#data.table(PSL=psl,fitted_values=fit$fitted.values) # view data table 

# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(age=fit$model$D_PT_age,psl=psl,decile=dcl)
dat$category = as.factor(if_else(dat$age<60,1,2)) # two categories: <60 or >=60 at diagnosis

# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/2,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="1") %>% group_by(decile) %>% count(category,name = "n_lt60")
tmp2 = dat %>% filter(category=="2") %>% group_by(decile) %>% count(category,name = "n_ge60")
avg = dat %>% filter() %>% group_by(decile) %>% summarise_at(vars(age),mean) # average days to death per decile - ignore censored values
cnt = data.table(merge(merge(tmp1,tmp2,by=c("decile"),all = T),avg,by="decile"))[,c("decile","n_lt60","n_ge60","age")]
kable(cnt,align = 'c',caption = "Age PSL decile counts")
Age PSL decile counts
decile n_lt60 n_ge60 age
1 52 25 54.57143
2 38 39 58.62338
3 33 43 59.61842
4 32 45 60.53247
5 27 50 62.58442
6 33 43 62.35526
7 21 56 64.98701
8 16 60 66.38158
9 17 60 67.59740
10 5 72 70.46753
# calculate hazards ratios
a = cnt[decile%in%c(1),n_lt60]
b = cnt[decile%in%c(1),n_ge60]
c = sum(cnt[decile%in%c(5,6),n_lt60])
d = sum(cnt[decile%in%c(5,6),n_ge60])

hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)

paste0("Odds Ratio: Age PSL in decile 1 compared with deciles 5 + 6 for <60 or >=60 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "Odds Ratio: Age PSL in decile 1 compared with deciles 5 + 6 for <60 or >=60 = 3.22 (1.81-5.74)"

Gender

fit = glm.gender
psl = rowSums(data.matrix(fit$data[,-1]) %*% diag(coef(fit)[-1])) + coef(fit)[1] # multiple each PC by the PC beta, sum across PCs, add the intercept beta

# check PSL calculation
unique(round(fit$fitted.values,12) == round(exp(psl)/(1+exp(psl)),12)) 
## [1] TRUE
#data.table(PSL=psl,fitted_values=fit$fitted.values,ePSL=exp(psl)/(1+exp(psl))) # view data table 

# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(category=fit$data$D_PT_gender,psl=psl,decile=dcl)

# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/10,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="1") %>% group_by(decile) %>% count(category,name = "n_1")
tmp2 = dat %>% filter(category=="2") %>% group_by(decile) %>% count(category,name = "n_2")
cnt = data.table(merge(tmp1,tmp2,by=c("decile"),all = T) %>% select("decile","n_1","n_2"))
kable(cnt,align = 'c',caption = "Gender PSL decile counts")
Gender PSL decile counts
decile n_1 n_2
1 65 12
2 67 10
3 52 24
4 54 23
5 48 29
6 41 35
7 35 42
8 41 35
9 33 44
10 17 60
# calculate hazards ratios
a = cnt[decile%in%c(10),n_2]
b = cnt[decile%in%c(10),n_1]
c = sum(cnt[decile%in%c(5,6),n_2])
d = sum(cnt[decile%in%c(5,6),n_1])

hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)

paste0("Odds Ratio: Gender PSL in decile 10 compared with deciles 5 + 6 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "Odds Ratio: Gender PSL in decile 10 compared with deciles 5 + 6 = 4.91 (2.62-9.19)"

Race

fit = glm.race
psl = rowSums(data.matrix(fit$data[,-1]) %*% diag(coef(fit)[-1])) + coef(fit)[1] # multiple each PC by the PC beta, sum across PCs, add the intercept beta

# check PSL calculation
unique(round(fit$fitted.values,12) == round(exp(psl)/(1+exp(psl)),12)) 
## [1] TRUE
#data.table(PSL=psl,fitted_values=fit$fitted.values,ePSL=exp(psl)/(1+exp(psl))) # view data table 

# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(category=fit$data$D_PT_race,psl=psl,decile=dcl)

# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/5,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="1") %>% group_by(decile) %>% count(category,name = "n_1")
tmp2 = dat %>% filter(category=="2") %>% group_by(decile) %>% count(category,name = "n_2")
cnt = data.table(merge(tmp1,tmp2,by=c("decile"),all = T) %>% select("decile","n_1","n_2"))
kable(cnt,align = 'c',caption = "Race PSL decile counts")
Race PSL decile counts
decile n_1 n_2
1 61 NA
2 57 3
3 57 3
4 57 3
5 54 6
6 53 7
7 52 8
8 42 18
9 40 20
10 28 32
# calculate hazards ratios
a = cnt[decile%in%c(10),n_2]
b = cnt[decile%in%c(10),n_1]
c = sum(cnt[decile%in%c(5,6),n_2])
d = sum(cnt[decile%in%c(5,6),n_1])

hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)

paste0("Odds Ratio: Race PSL in decile 10 compared with deciles 5 + 6 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "Odds Ratio: Race PSL in decile 10 compared with deciles 5 + 6 = 9.41 (4.37-20.26)"

Ethnicity

fit = glm.ethnic
psl = rowSums(data.matrix(fit$data[,-1]) %*% diag(coef(fit)[-1])) + coef(fit)[1] # multiple each PC by the PC beta, sum across PCs, add the intercept beta

# check PSL calculation
unique(round(fit$fitted.values,12) == round(exp(psl)/(1+exp(psl)),12)) 
## [1] TRUE
#data.table(PSL=psl,fitted_values=fit$fitted.values,ePSL=exp(psl)/(1+exp(psl))) # view data table 

# split into decile
dcl = cut(psl,quantile(psl,c(0,1/10,2/10,3/10,4/10,5/10,6/10,7/10,8/10,9/10,1)),include.lowest = T,labels = F)
dat = data.table(category=fit$data$D_PT_ethnic,psl=psl,decile=dcl)

# plot distribution
ggplot(dat) + geom_dotplot(aes(x=psl,fill=category),binwidth = 1/5,color="white") + theme_classic()

# count number in each category by decile
tmp1 = dat %>% filter(category=="1") %>% group_by(decile) %>% count(category,name = "n_1")
tmp2 = dat %>% filter(category=="2") %>% group_by(decile) %>% count(category,name = "n_2")
cnt = data.table(merge(tmp1,tmp2,by=c("decile"),all = T) %>% select("decile","n_1","n_2"))
kable(cnt,align = 'c',caption = "Ethnicity PSL decile counts")
Ethnicity PSL decile counts
decile n_1 n_2
1 28 35
2 6 56
3 6 57
4 4 58
5 7 56
6 3 59
7 2 60
8 2 61
9 1 61
10 NA 63
cnt[is.na(cnt)] <- 0 # replace NA with 0

# calculate hazards ratios
a = cnt[decile%in%c(1),n_1]
b = cnt[decile%in%c(1),n_2]
c = sum(cnt[decile%in%c(5,6),n_1])
d = sum(cnt[decile%in%c(5,6),n_2])

hr10 = (a/b)/(c/d)
se10 = sqrt((1/a)+(1/b)+(1/c)+(1/d))
ci10_l = exp(log(hr10)-1.96*se10)
ci10_u = exp(log(hr10)+1.96*se10)

paste0("Odds Ratio: Ethnicity PSL in decile 1 compared with deciles 5 + 6 = ",round(hr10,digits = 2)," (",round(ci10_l,digits = 2),"-",round(ci10_u,digits = 2),")")
## [1] "Odds Ratio: Ethnicity PSL in decile 1 compared with deciles 5 + 6 = 9.2 (4.07-20.79)"